import sys
sys.path.append('../../')
%%capture
import pandas as pd
import cormerant as cmt
import scanpy as sc
import squidpy as sq
from matplotlib import pyplot as plt
import os
import numpy as np
import geopandas as gpd
import shapely
os.chdir('/Users/DouglasHannumJr/Desktop/s3_bucket_data/s3_bucket_data/')
os.listdir()
['adata.h5ad', 'adata.pkl', 'all_cell_nbhd.parquet', 'anndata.h5ad', 'anndata.pickle', 'cell_boundaries.parquet', 'cell_by_gene.csv', 'cell_metadata.csv', 'images', 'leiden_alpha.csv', 'meta_cell_with_clusters.csv', 'neuro_panel_cluster_annotation.csv', 'partitioned_transcripts.csv', 'partitioned_transcripts.parquet', 'partitioned_transcripts_mill.csv', 'partitioned_transcripts_tenmill.csv', 'spat.Rds', 'updated_cell_metadata.csv']
adata = sq.read.vizgen('./',
counts_file = 'cell_by_gene.csv',
meta_file='cell_metadata.csv',
)
md = pd.read_csv('./cell_metadata.csv',index_col=0)
adata.obs['EntityID'] = md.index
plt.hist(np.log10(adata.obs['transcript_count']))
plt.show()
adata.var_names_make_unique()
sc.pp.filter_cells(adata, min_counts = 50)
%%time
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
sc.pp.scale(adata, max_value=10)
sc.tl.pca(adata, svd_solver='arpack')
sc.pp.neighbors(adata, n_neighbors=10, n_pcs=20)
sc.tl.umap(adata)
sc.tl.leiden(adata, resolution = .5)
C:\Users\DouglasHannumJr\AppData\Roaming\Python\Python310\site-packages\umap\distances.py:1063: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details. @numba.jit() C:\Users\DouglasHannumJr\AppData\Roaming\Python\Python310\site-packages\umap\distances.py:1071: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details. @numba.jit() C:\Users\DouglasHannumJr\AppData\Roaming\Python\Python310\site-packages\umap\distances.py:1086: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details. @numba.jit() C:\Users\DouglasHannumJr\AppData\Roaming\Python\Python310\site-packages\umap\umap_.py:660: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details. @numba.jit()
CPU times: total: 2min 52s Wall time: 2min 14s
sc.set_figure_params(figsize=(10,10))
sc.pl.umap(adata, color = ['leiden'], size = 5)
C:\Users\DouglasHannumJr\AppData\Roaming\Python\Python310\site-packages\scanpy\plotting\_tools\scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter(
sq.pl.spatial_scatter(
adata,
shape = None, color = 'leiden', size = 0.5,
library_id = 'spatial', figsize = (10,10))
C:\Users\DouglasHannumJr\AppData\Roaming\Python\Python310\site-packages\squidpy\pl\_spatial_utils.py:956: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored _cax = scatter(
markers = pd.read_csv('neuro_panel_cluster_annotation.csv', index_col = 0)
markers.index = markers.gene
markers.head()
| gene | Class | ClusterName | Description | Neurotransmitter | Region | |
|---|---|---|---|---|---|---|
| gene | ||||||
| Igf2 | Igf2 | Vascular | VECA | Vascular endothelial cells, arterial | NaN | CNS |
| Ngfr | Ngfr | Neurons | SYNOR4 | Noradrenergic erector muscle neurons | Noradrenaline | Sympathetic ganglion |
| Ccnd2 | Ccnd2 | Neurons | SZNBL | Neuronal intermidate progenitor cells | NaN | Striatum dorsal, Striatum ventral, Dentate gyrus |
| Th | Th | Neurons | SYNOR4 | Noradrenergic erector muscle neurons | Noradrenaline | Sympathetic ganglion |
| Lhx2 | Lhx2 | Neurons | TEGLU13 | Excitatory neurons, cerebral cortex | Glutamate (VGLUT1,VGLUT2), Nitric oxide | Cortex |
adata.var = pd.merge(adata.var, markers, how = 'left',
left_index = True,
right_index = True)
adata.var.Class.value_counts()
Class Neurons 106 Vascular 13 Ependymal 5 Oligos 5 Astrocytes 5 Immune 4 PeripheralGlia 3 Name: count, dtype: int64
markers.Class.value_counts()
Class Neurons 388 Vascular 30 Oligos 27 Ependymal 18 PeripheralGlia 18 Astrocytes 11 Immune 8 Name: count, dtype: int64
%%capture
sc.tl.rank_genes_groups(adata, 'leiden', method = 'wilcoxon')
%%capture
top_markers = pd.DataFrame()
for cluster in adata.obs['leiden'].unique():
markers_ = adata.uns['rank_genes_groups']['names'][cluster][:10]
scores = adata.uns['rank_genes_groups']['scores'][cluster][:10]
top_markers = pd.concat([top_markers, pd.DataFrame({'cluster': [cluster]*10,
'gene': markers_,
'scores': scores})])
markers = markers.reset_index(drop = True)
top_markers = pd.merge(top_markers, markers, on = 'gene', how = 'left')
marker_table = pd.crosstab(top_markers.cluster, top_markers.Class)
labels = marker_table.idxmax(axis = 1)
labels.index = labels.index.astype(dtype = 'int')
labels = labels.sort_index()
labels.name = 'cluster_type'
adata.obs['cluster'] = adata.obs.leiden.astype('int')
adata.obs = pd.merge(adata.obs, labels, how = 'left', on = 'cluster')
adata.obs['cluster'] = adata.obs.leiden.astype('str') + '-' + adata.obs.cluster_type
sc.pl.umap(adata, color = 'cluster', size = 5)
C:\Users\DouglasHannumJr\AppData\Roaming\Python\Python310\site-packages\scanpy\plotting\_tools\scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter(
sq.pl.spatial_scatter(
adata,
shape = None,
color = 'cluster',
size = .75,
library_id = 'spatial',
figsize = (10,10)
)
C:\Users\DouglasHannumJr\AppData\Roaming\Python\Python310\site-packages\squidpy\pl\_spatial_utils.py:956: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored _cax = scatter(
sc.pl.umap(adata, color = 'cluster_type', size = 5)
C:\Users\DouglasHannumJr\AppData\Roaming\Python\Python310\site-packages\scanpy\plotting\_tools\scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter(
sq.pl.spatial_scatter(
adata,
shape = None,
color = 'cluster_type',
size = .5,
library_id = 'spatial',
figsize = (10,10)
)
C:\Users\DouglasHannumJr\AppData\Roaming\Python\Python310\site-packages\squidpy\pl\_spatial_utils.py:956: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored _cax = scatter(
Alpha Shape Stuff
%%time
sq.gr.spatial_neighbors(adata, coord_type = 'generic', spatial_key = 'spatial')
sq.gr.nhood_enrichment(adata, cluster_key = 'cluster')
0%| | 0/1000 [00:00<?, ?/s]
CPU times: total: 17.8 s Wall time: 29.7 s
import pickle
with open('adata.pkl','wb') as file:
pickle.dump(adata, file)
alpha = gpd.read_parquet('/Users/DouglasHannumJr/Desktop/s3_bucket_data/s3_bucket_data/all_cell_nbhd.parquet')
Lettuce look at a vascular cell.
cell_colors = dict(zip(adata.obs['cluster_type'].cat.categories.tolist(),
adata.uns['cluster_type_colors']))
adata.obs[['center_x','center_y']] = adata.obsm['spatial']
adata.obs.index = adata.obs.EntityID
adata.obs.to_csv('./updated_cell_metadata.csv')
import importlib
importlib.reload(cmt.viz)
<module 'cormerant.viz' from 'C:\\Users\\DouglasHannumJr\\cormerant-internal\\cormerant\\viz\\__init__.py'>
%%time
cmt.viz.plot_3d_cellPortraits_locations(
md = adata.obs,
cell_idx = adata.obs.index[100:110],
alpha_shape_df=alpha,
cell_idx_groups=adata.obs.cluster_type[100:110],
scale = 10
)
embed
CPU times: total: 703 ms Wall time: 721 ms
%%time
importlib.reload(cmt.viz)
cmt.viz.plot_3d(
directory = './',
cell_idx = 121919181900000610,
local=True,
cell_categories='cluster_type',
window_size=200,
max_number_of_cells=10000,
pl_batch_size = None,
transformation_file = 'images/micron_to_mosaic_pixel_transform.csv',
cell_boundaries_file= 'cell_boundaries.parquet',
metadata_filename = 'updated_cell_metadata.csv',
image_file = 'images/mosaic_DAPI_z0.tif',
transcripts_file = 'partitioned_transcripts.csv',
cell_colors = cell_colors
)
loading metadata loading cell boundaries loading image loading transcripts processing data embed
CPU times: total: 46.8 s Wall time: 21.2 s
import random
random.seed(101)
cell_idx = random.sample(adata.obs.index.to_list(), 9)
cell_idx_groups = adata.obs.loc[cell_idx,'cluster_type']
cell_idx_groups
EntityID 121919181900076205 Neurons 121919181900025544 Neurons 121919181900070708 Neurons 121919181900047035 Neurons 121919181900061255 Neurons 121919181900006364 Immune 121919181900066025 Neurons 121919181900028136 Vascular 121919181900029077 Oligos Name: cluster_type, dtype: category Categories (5, object): ['Astrocytes', 'Immune', 'Neurons', 'Oligos', 'Vascular']
adata.obs.loc[cell_idx,:]
| fov | volume | min_x | min_y | max_x | max_y | anisotropy | transcript_count | perimeter_area_ratio | solidity | EntityID | n_counts | leiden | cluster | cluster_type | center_x | center_y | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| EntityID | |||||||||||||||||
| 121919181900076205 | NaN | 672.675045 | 7326.800148 | 5471.750808 | 7336.865748 | 5483.220408 | 1.177693 | 532 | 0.500019 | 4.906303 | 121919181900076205 | 526.0 | 11 | 11-Neurons | Neurons | 7331.572549 | 5477.365879 |
| 121919181900025544 | NaN | 1325.321019 | 7869.038569 | 3476.540613 | 7882.776169 | 3495.138213 | 1.368497 | 926 | 0.354131 | 4.572747 | 121919181900025544 | 921.0 | 11 | 11-Neurons | Neurons | 7875.936714 | 3485.659940 |
| 121919181900070708 | NaN | 939.646173 | 4442.402818 | 2810.493718 | 4455.168419 | 2822.395319 | 1.563477 | 546 | 0.431894 | 5.730559 | 121919181900070708 | 543.0 | 2 | 2-Neurons | Neurons | 4448.540029 | 2816.330188 |
| 121919181900047035 | NaN | 1359.322747 | 3847.110200 | 2241.227858 | 3861.927800 | 2255.721458 | 1.159034 | 418 | 0.345056 | 5.816319 | 121919181900047035 | 416.0 | 5 | 5-Neurons | Neurons | 3854.774056 | 2248.176539 |
| 121919181900061255 | NaN | 507.194020 | 798.952706 | 4875.546737 | 808.154306 | 4886.584337 | 1.272157 | 316 | 0.630637 | 4.961753 | 121919181900061255 | 315.0 | 9 | 9-Neurons | Neurons | 803.610721 | 4881.899592 |
| 121919181900006364 | NaN | 530.426260 | 5394.095623 | 4250.803436 | 5406.537223 | 4264.649036 | 1.363851 | 150 | 0.776861 | 3.214264 | 121919181900006364 | 150.0 | 12 | 12-Immune | Immune | 5399.282633 | 4258.382778 |
| 121919181900066025 | NaN | 1090.619165 | 6756.297692 | 2712.602579 | 6767.983293 | 2725.476179 | 1.228211 | 579 | 0.382697 | 5.990143 | 121919181900066025 | 578.0 | 9 | 9-Neurons | Neurons | 6761.684680 | 2719.416899 |
| 121919181900028136 | NaN | 319.863988 | 7911.997957 | 1578.393001 | 7918.607557 | 1586.622602 | 1.268324 | 122 | 0.650445 | 4.958105 | 121919181900028136 | 122.0 | 4 | 4-Vascular | Vascular | 7915.275176 | 1582.522094 |
| 121919181900029077 | NaN | 912.352412 | 3380.432441 | 5957.910657 | 3392.982041 | 5974.456258 | 1.363795 | 300 | 0.524158 | 4.337677 | 121919181900029077 | 299.0 | 0 | 0-Oligos | Oligos | 3386.840161 | 5968.146810 |
%%time
importlib.reload(cmt.viz)
cmt.viz.plot_3d_cellPortraits(
directory = './',
grid = (3,3),
cell_idx = cell_idx,
cell_idx_groups = cell_idx_groups,
local=True,
cell_categories='cluster_type',
window_size=50,
pl_batch_size = None,
transformation_file = 'images/micron_to_mosaic_pixel_transform.csv',
cell_boundaries_file= 'cell_boundaries.parquet',
metadata_filename = 'updated_cell_metadata.csv',
image_file = 'images/mosaic_DAPI_z0.tif',
transcripts_file = 'partitioned_transcripts.csv',
cell_colors = cell_colors
)
loading metadata loading cell boundaries loading image processing images loading transcripts processing data embed
CPU times: total: 1min 12s Wall time: 26.2 s
importlib.reload(cmt.viz)
<module 'cormerant.viz' from 'C:\\Users\\DouglasHannumJr\\cormerant-internal\\cormerant\\viz\\__init__.py'>
%%time
cmt.viz.plot_3d_cellPortraits_alpha_minimap(
directory = './',
grid = (3,3),
cell_idx = cell_idx,
cell_idx_groups = cell_idx_groups,
local=True,
cell_categories='cluster_type',
window_size=50,
pl_batch_size = None,
minimap_box_scale=10,
alpha_shape_df = alpha,
transformation_file = 'images/micron_to_mosaic_pixel_transform.csv',
cell_boundaries_file= 'cell_boundaries.parquet',
metadata_filename = 'updated_cell_metadata.csv',
image_file = 'images/mosaic_DAPI_z0.tif',
transcripts_file = 'partitioned_transcripts.csv',
cell_colors = cell_colors
)
loading metadata loading cell boundaries loading image processing images loading transcripts processing data embed
CPU times: total: 57 s Wall time: 27 s